import json
import pandas as pd
import re


def parse_jsonl_results(jsonl_path):
    """解析JSONL结果文件"""
    results = []

    with open(jsonl_path, 'r', encoding='utf-8') as f:
        for line in f:
            if not line.strip():
                continue

            try:
                data = json.loads(line)
                response = data.get("response", {})
                body = response.get("body", {})
                choices = body.get("choices", [])

                if not choices:
                    continue

                message = choices[0].get("message", {})
                custom_id = data.get("custom_id", "")

                # 从custom_id提取全局行号
                global_id = extract_global_id(custom_id)

                results.append({
                    "全局行号": global_id,
                    "校对结果": message.get("content", ""),
                    "推理内容": message.get("reasoning_content", "")
                })

            except Exception as e:
                print(f"解析错误: {e}")

    return pd.DataFrame(results)


def extract_global_id(custom_id):
    """从custom_id中提取全局行号"""
    match = re.search(r'request-(\d+)', custom_id)
    return int(match.group(1)) if match else 0